Survey Completion Rates by District

##PAge 1
library(sf)
library(tidyverse)


##Import data

## Write table by district
table_by_district_gp<-xlsx::read.xlsx("C:/Users/ASUS/Dropbox/Karnataka Research/Analysis/2_Intermediate/Do/PDO Survey/PDO Survey Deployment/Output/district_targets.xlsx",1)

table_by_district_gp<-table_by_district_gp %>% 
  filter(!is.na(DistrictCode)) %>%
  mutate(Percentage.GPs.Covered=round(Percentage.GPs.Covered))




## Write table by block
table_by_block_gp<-xlsx::read.xlsx("C:/Users/ASUS/Dropbox/Karnataka Research/Analysis/2_Intermediate/Do/PDO Survey/PDO Survey Deployment/Output/block_targets.xlsx",1) 



table_by_block_gp<-table_by_block_gp %>% 
  filter(!is.na(DistrictCode)) %>%
  mutate(Percentage.GPs.Covered=round(Percentage.GPs.Covered))



#Import division data
division_labels<-xlsx::read.xlsx("C:/Users/ASUS/Dropbox/Karnataka Research/Analysis/2_Intermediate/Do/PDO Survey/PDO Survey Deployment/Output/District_division_codes.xls",1)

division_labels<-division_labels%>%
  rename(DistrictCode=District.Code)%>%
  mutate(DistrictCode=as.character(DistrictCode))%>%
  select(DistrictCode,Division)

##Add admin division
table_by_block_gp<-right_join(division_labels,table_by_block_gp)
## Joining, by = "DistrictCode"
table_by_district_gp<-right_join(division_labels,table_by_district_gp)
## Joining, by = "DistrictCode"
##Import karnataka shape file
karnataka_shp_files<- read_sf(dsn =  "C:/Users/ASUS/Dropbox/Karnataka Research/Analysis/2_Intermediate/Do/PDO Survey/PDO Survey Deployment/Data/Shp Files Dist/gadm36_IND_2.shp",layer="gadm36_IND_2") 

karnataka_shp_files <- karnataka_shp_files %>% 
  filter(NAME_1=="Karnataka")

##Fix district names
karnataka_shp_files <- karnataka_shp_files %>% 
  mutate(NAME_2=toupper(NAME_2))

karnataka_shp_files$NAME_2[karnataka_shp_files$NAME_2=="BAGALKOT"]="BAGALKOTE"
karnataka_shp_files$NAME_2[karnataka_shp_files$NAME_2=="BANGALORE RURAL"]="BENGALURU RURAL"

karnataka_shp_files$NAME_2[karnataka_shp_files$NAME_2=="BANGALORE"]="BENGALURU"
karnataka_shp_files$NAME_2[karnataka_shp_files$NAME_2=="DHARWAD"] = "DHARWAR"

karnataka_shp_files$NAME_2[karnataka_shp_files$NAME_2=="KALBURGI"] = "KALABURAGI"

karnataka_shp_files$NAME_2[karnataka_shp_files$NAME_2=="KOLARA"] = "KOLAR"
karnataka_shp_files$NAME_2[karnataka_shp_files$NAME_2=="BELGAUM"] = "BELAGAVI"
karnataka_shp_files$NAME_2[karnataka_shp_files$NAME_2=="BELLARY"] = "BALLARI"
karnataka_shp_files$NAME_2[karnataka_shp_files$NAME_2=="BIJAPUR"] = "VIJAYAPURA"
karnataka_shp_files$NAME_2[karnataka_shp_files$NAME_2=="CHAMRAJNAGAR"] = "CHAMARAJANAGARA"
karnataka_shp_files$NAME_2[karnataka_shp_files$NAME_2=="CHIKBALLAPURA"] = "CHIKKABALLAPURA"
karnataka_shp_files$NAME_2[karnataka_shp_files$NAME_2=="GULBARGA"] = "KALABURAGI"
karnataka_shp_files$NAME_2[karnataka_shp_files$NAME_2=="MYSORE"] = "MYSURU"
karnataka_shp_files$NAME_2[karnataka_shp_files$NAME_2=="SHIMOGA"] = "SHIVAMOGGA"
karnataka_shp_files$NAME_2[karnataka_shp_files$NAME_2=="CHIKMAGALUR"] = "CHIKKAMAGALURU"
karnataka_shp_files$NAME_2[karnataka_shp_files$NAME_2=="TUMKUR"] = "TUMAKURU"


karnataka_shp_files$DistrictName=karnataka_shp_files$NAME_2

karnataka_shp_files <- st_transform(karnataka_shp_files,4326)


karnataka_shp_files_district_response<-left_join(karnataka_shp_files,table_by_district_gp)
## Joining, by = "DistrictName"
##Division shp
karnataka_shp_files_district_response_div<-karnataka_shp_files_district_response %>% 
  group_by(Division) %>% 
  summarize(geometry=st_union(geometry))
dat_block <-table_by_block_gp
dat_block <- dat_block %>% 
  select(DistrictName, BlockName, Total.GPs.in.the.Block, Total.GPs.covered.in.the.Block,Total.surveys.completed.in.the.Block,Percentage.GPs.Covered,Division) 

###
dat_dist <-table_by_district_gp
dat_dist <- dat_dist %>% 
  select(DistrictName, Total.GPs.in.the.District, Total.GPs.covered.in.the.District,Total.surveys.completed.in.the.District,Percentage.GPs.Covered,Division) 
###

shared_dat_block <- SharedData$new(data=dat_block ,group="abSelector" )

# shared_dat_dist <- SharedData$new(data=dat_dist,~DistrictName, group="abSelector")
p<-ggplot() +
  # geom_sf(data=karnataka_shp_files_district_response_div,aes(color=Division),alpha=1,stroke=2,lwd=2)+
  # scale_color_manual(values=c("darkorange","yellow","lightblue","darkgreen"))+
  geom_sf_interactive(data = karnataka_shp_files_district_response,
                           aes(fill=Percentage.GPs.Covered , data_id = DistrictName ,
                            onclick = paste0("filterOn(\"",DistrictName,"\")"),tooltip=DistrictName),
                           alpha = .6) + theme_void()+
    scale_fill_gradient(name="",low="white",high="darkorange", guide = guide_colorbar(reverse = FALSE,title="Percentage of GPs Covered"))+
      scale_y_continuous(label = abs)  +
        scale_x_continuous(label = abs) + theme(panel.border = element_rect(colour = "grey", fill=NA, size=0.1),rect = element_blank(),panel.grid.major = element_line(color = "white"),axis.ticks = element_line())+
  geom_sf_text_interactive(data = karnataka_shp_files_district_response,aes(label=Percentage.GPs.Covered,tooltip=DistrictName),size=3)

g <- girafe(ggobj = p)
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not give correct
## results for longitude/latitude data

## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not give correct
## results for longitude/latitude data

## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not give correct
## results for longitude/latitude data

## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not give correct
## results for longitude/latitude data

## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not give correct
## results for longitude/latitude data

## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not give correct
## results for longitude/latitude data

## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not give correct
## results for longitude/latitude data

## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not give correct
## results for longitude/latitude data

## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not give correct
## results for longitude/latitude data

## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not give correct
## results for longitude/latitude data

## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not give correct
## results for longitude/latitude data

## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not give correct
## results for longitude/latitude data

## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not give correct
## results for longitude/latitude data

## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not give correct
## results for longitude/latitude data

## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not give correct
## results for longitude/latitude data

## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not give correct
## results for longitude/latitude data

## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not give correct
## results for longitude/latitude data

## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not give correct
## results for longitude/latitude data

## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not give correct
## results for longitude/latitude data

## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not give correct
## results for longitude/latitude data

## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not give correct
## results for longitude/latitude data

## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not give correct
## results for longitude/latitude data

## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not give correct
## results for longitude/latitude data

## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not give correct
## results for longitude/latitude data

## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not give correct
## results for longitude/latitude data

## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not give correct
## results for longitude/latitude data

## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not give correct
## results for longitude/latitude data

## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not give correct
## results for longitude/latitude data

## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not give correct
## results for longitude/latitude data

## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not give correct
## results for longitude/latitude data
rt_block<-datatable(shared_dat_block,colnames = c("District Name","Block Name","Total GPs in the Block","Total GPs covered in the Block","Total GPs covered in the Block","Total surveys completed in the Block","Percentage GPs Covered","Division") )
# rt_block <- reactable(
#     shared_dat_block,
#     elementId = "ABtable",
#     columns = list(
#     DistrictName = colDef(name = "District Name"),
#     BlockName = colDef(name = "Block Name"),
#     Total.GPs.in.the.Block = colDef(name = "Total GPs in the Block"),
#     Total.GPs.covered.in.the.Block=colDef(name="Total GPs covered in the Block"),
#     Total.surveys.completed.in.the.Block = colDef(name = "Total surveys completed in the Block"),
# 
#     Percentage.GPs.Covered=colDef(name = "Percentage GPs Covered",format=colFormat(percent = FALSE,digits = 1),style = function(value) {
#     if (value < 20) {
#       color <- "#white"
#     } else if (value >80) {
#       color <- "darkorange"
#     } else {
#       color <- "#ffead2"
#     }
#     list(color = color, fontWeight = "bold")
#   })
#   )
# ,showPageSizeOptions=TRUE)


rt_block<-datatable(shared_dat_block,colnames = c("District Name","Block Name","Total GPs in the Block","Total GPs covered in the Block","Total surveys completed in the Block","Percentage GPs Covered","Division") ,rownames = FALSE)

###
# rt_dist <- reactable(
#     shared_dat_dist,
#     elementId = "ABtableDist",
#     columns = list(
#     DistrictName = colDef(name = "District Name"),
#     Total.GPs.in.the.District = colDef(name = "Total GPs in the  District"),
#     Total.GPs.covered.in.the.District=colDef(name="Total GPs covered in the  District"),
#     Total.surveys.completed.in.the.District = colDef(name = "Total surveys completed in the  District"),
# 
#     Percentage.GPs.Covered=colDef(name = "Percentage GPs Covered",format=colFormat(percent = FALSE,digits = 1),style = function(value) {
#     if (value < 20) {
#       color <- "#white"
#     } else if (value >80) {
#       color <- "#8b0101"
#     } else {
#       color <- "#ffead2"
#     }
#     list(color = color, fontWeight = "bold")
#   })
#   )
# )


##Define filter
fs_div<-filter_select("DivFilter", "Select Division", shared_dat_block, group=~Division, multiple=TRUE)
fs_dist <- filter_select("letterFilter", "Select District", shared_dat_block, group=~DistrictName, multiple=TRUE)

##Layout  

# bscols(
#        list(fs_div,fs_dist, g),
#    list(rt_block)
# )

### Row

rt_block
##Import pdo survey data

library(haven)
library(stringr)

pdo_survey_data<-read_dta(file="C:/Users/ASUS/Dropbox/Karnataka Research/Analysis/2_Intermediate/Do/PDO Survey/PDO Survey Deployment/Output/PDO_survey_cleaned.dta")



district_codes<-xlsx::read.xlsx("../Output/district_codes.xlsx",1)
## Districts into division 
## source : Wikipedia
bangalore_division<-"BENGALURU, BENGALURU Rural, CHIKKABALLAPURA, Chitradurga, Davanagere, Kolar, Ramanagara, SHIVAMOGGA, Tumakuru"

bangalore_division <- bangalore_division %>% toupper()  %>% str_split(.,", ")

belagavi_divison<-c("BAGALKOTE District, Belagavi District, VIJAYAPURA District, DHARWAR District, Gadag District, Haveri District, Uttara Kannada")
belagavi_divison<- belagavi_divison %>% toupper()  %>% str_split(.," DISTRICT, ")

Kalaburagi_division<-c("BALLARI District, Bidar District, Kalaburagi District, Koppal District, Raichur District, Yadgir District, Vijayanagar")
Kalaburagi_division<- Kalaburagi_division %>% toupper()  %>% str_split(.," DISTRICT, ")

  
Mysore_division<-  c("CHAMARAJANAGARA, CHIKKAMAGALURU, Dakshina Kannada, Hassan, Kodagu, Mandya, MYSURU, Udupi")
Mysore_division<- Mysore_division %>% toupper() %>% str_split(.,", ")

district_codes <- district_codes %>% 
  mutate(DivisionName = case_when(
    district_codes$DistrictName %in% unlist(bangalore_division) ~ "Bangalore",
    DistrictName %in% unlist(belagavi_divison) ~ "Belagavi",
    DistrictName %in% unlist(Kalaburagi_division) ~ "Kalaburagi",
    DistrictName %in% unlist(Mysore_division) ~ "Mysore"



  ))

pdo_survey_data<-pdo_survey_data %>% 
  rename(DistrictCode=District)%>%
  mutate(DistrictCode=as.integer(DistrictCode))

pdo_survey_data<-inner_join(pdo_survey_data,district_codes)
## Joining, by = "DistrictCode"
###Remove duplicates
##MAke a variable with panchayats - if multiple and single
pdo_survey_data$Panchayat_all_multiple=str_c(pdo_survey_data$Panchayat,pdo_survey_data$Panchayat_multiple,sep="")

##Seperate, sort , then concat
sorted_multiple_panchayats=pdo_survey_data$Panchayat_all_multiple %>% str_split(.," ",simplify = F)  %>% sapply(.,function(x) (sort(x[x!=""])))  %>% sapply(.,function(x)paste(x,collapse = " "))

pdo_survey_data$Panchayat_all_multiple=sorted_multiple_panchayats

##Now clean the end date to find the last survey for each 

library(lubridate)
pdo_survey_data<-pdo_survey_data %>% 
  mutate(endtime_cleaned=ymd_hms(endtime))


###Go at the PDO level for the section questions
###Drop duplicates by Panchayats by taking the last response by endtime
pdo_survey_data_pdo_level<-pdo_survey_data %>% 
  group_by(Panchayat_all_multiple) %>% 
  arrange(desc(endtime_cleaned)) %>% 
  slice(1)


##Get value labels
pdo_survey_data_pdo_level<-pdo_survey_data_pdo_level %>% 
  mutate(gender=haven::as_factor(gender)) %>% 
  mutate(education=haven::as_factor(education)) %>% 
  mutate(rural=haven::as_factor(rural))

##Fix gender levels
levels(pdo_survey_data_pdo_level$gender) = c("Female","Male","Other","Prefer Not to Say")

Who the PDOs are

##shared data for this section
# karnataka_shp_files_district_response_div <-karnataka_shp_files_district_response_div%>%
#   rename(DivisionName=Division)

##Division Plot
# division_shp<-ggplot() +
#   geom_sf_interactive(data = karnataka_shp_files_district_response_div,
#                            aes(fill=DivisionName , data_id = DivisionName ,
#                             onclick = paste0("filterOn(\"",DivisionName,"\")"),tooltip=DivisionName),
#                            alpha = .6) + theme_void()+
#   scale_color_manual(values=c("darkorange","yellow","lightblue","darkgreen")) +
#       scale_y_continuous(label = abs)  +
#         scale_x_continuous(label = abs) + theme(panel.border = element_rect(colour = "grey", fill=NA, size=0.1),rect = element_blank(),panel.grid.major = element_line(color = "white"),axis.ticks = element_line()) 
# 
# division_shp <- girafe(ggobj =division_shp)

##Percent_rural
rural_percent<-pdo_survey_data_pdo_level %>% ungroup() %>% 
    add_count(rural,DivisionName) %>% 
    add_count(DivisionName) %>% 
    mutate(rural_perc=n/nn) %>% 
  select(DivisionName,rural_perc,rural) %>%
  unique
## Storing counts in `nn`, as `n` already present in input
## i Use `name = "new_name"` to pick a new name.
##Percent_gender
gender_percent<-pdo_survey_data_pdo_level %>% ungroup() %>% 
    filter(gender %in% c("Female","Male")) %>%
    add_count(gender,DivisionName) %>% 
    add_count(DivisionName) %>% 
    mutate(gender_perc=n/nn) %>% 
  select(DivisionName,gender_perc,gender) %>%
  unique
## Storing counts in `nn`, as `n` already present in input
## i Use `name = "new_name"` to pick a new name.
#Percent edu
edu_percent<-pdo_survey_data_pdo_level %>% ungroup() %>% 
    filter(education %in% c("10th pass","PUC","Diploma","Bachelor’s Degree","Master’s Degree","PhD")) %>%
    add_count(education,DivisionName) %>% 
    add_count(DivisionName) %>% 
    mutate(edu_perc=n/nn) %>% 
  select(DivisionName,edu_perc,education) %>%
  unique
## Storing counts in `nn`, as `n` already present in input
## i Use `name = "new_name"` to pick a new name.
##combine percent data
percent_pdo_data<-inner_join(edu_percent,gender_percent)
## Joining, by = "DivisionName"
percent_pdo_data<-inner_join(rural_percent,percent_pdo_data)
## Joining, by = "DivisionName"
# ##Division Filter
# ##shared data full survey
# shared_pd_data<- SharedData$new(pdo_survey_data_pdo_level,group = "sec1div")
# shared_percent_data<- SharedData$new(percent_pdo_data,group = "sec1div")
# 
# fs_div1<-filter_select("DivFilter2", "Select Division", shared_percent_data, group=~DivisionName, multiple=TRUE)

#Gender plot

 gender_plot<- ggplot() +
   geom_col(data=percent_pdo_data,aes(x=gender,y = gender_perc,fill=DivisionName),
            position = "dodge")+
    scale_x_discrete(limits=c("Female","Male")) +
    scale_fill_manual(values = c("darkorange","yellow","lightblue","darkgreen"),name="Administrative Division")+
    labs(y = "Percent", fill="Division",title="Gender Distribution") +
    scale_y_continuous(labels = scales::percent) +  
  theme_bw() +
    theme(axis.title.x = element_blank()) 
 
  gender_plot<-ggplotly(gender_plot,tooltip = c("gender_perc")) 

  
  #Rural Plot
 rural_plot<- ggplot() +
   geom_col(data=percent_pdo_data,aes(x=rural,y = rural_perc,fill=DivisionName),
            position = "dodge") +
    scale_fill_manual(values = c("darkorange","yellow","lightblue","darkgreen"),name="Administrative Division")+
    labs(y = "Percent", fill="Division",title = "Whether a PDO hails from a rural area") +
    scale_y_continuous(labels = scales::percent) +  
  theme_bw() +
    theme(axis.title.x = element_blank()) 
 
  rural_plot<-ggplotly(rural_plot,tooltip = c("rural_perc")) 
  
  
 
### Age Distribution
age_plot<- ggplot(pdo_survey_data_pdo_level, aes(age)) + 
  geom_density(adjust=5,aes(fill=DivisionName),alpha=0.3,position="identity") +
scale_x_continuous(breaks = seq(18,50,2)
,limits=c(18,50)) + xlab("Age") +
  scale_y_continuous(labels = scales::percent,name="Percentage")+
   labs(title = "Age distribution of the PDOs by Division") +
  scale_fill_manual(values = c("darkorange","yellow","lightblue","darkgreen"),name="Administrative Division")+
  theme_bw() 


age_plot<-ggplotly(age_plot,tooltip = c("age"))
## Warning: Removed 903 rows containing non-finite values (stat_density).
## Edu level


edu_plot<-ggplot(data=percent_pdo_data) +
    geom_col(aes(x=education,y = edu_perc,fill=DivisionName),position = "dodge") +
    scale_y_continuous(labels = scales::percent,name="Percentage")+
    labs(title = "Education distribution of the PDOs by Division") +
    scale_fill_manual(values = c("darkorange","yellow","lightblue","darkgreen"),name="Administrative Division")+
  scale_x_discrete(name="Highest Education Level")+
    theme_bw() +
    theme(axis.text.x = element_text(angle=320,vjust = 0.5,hjust=0.01))
  
edu_plot<-ggplotly(edu_plot,tooltip = c("edu_perc")) 

Row

Men make up 75% of all PDOs

gender_plot

The density is maximum around 35-40%

age_plot

Row

Most PDOs have a college degree

edu_plot

Most PDOs hail from villages

rural_plot

Work Experience

Their Feedback

Focus Areas

Time-Use

Covid Management